rm(list = ls())
library(readstata13)
library(plyr)
library(plotrix)
library(foreign)
library(stargazer)
library(ggplot2)
library(stm)
library(zoo)
library(car)
library(knitr)
library(qwraps2)
library(knitr)
library(xtable)
library(qwraps2)
library(tibble)
library(gdata)
library(tidyr)
library(data.table)
library(stringr)
library(reshape2)
library(ggrepel)
library(dfadjust)
library(lmtest)
library(summarytools)
library(kableExtra)
library(experiment)
library(AER)
library(sandwich)
library(ivpack)
library(psych)
library(remotes)
library(stats)
library(dplyr)

setwd("~/Dropbox/Participacion Politica FARC/Data_workshops/Replication/Dataverse files")
load("rawdata_farcexperiment_english.RData")

farc$treated <- as.factor(farc$treatment)
farc <- dplyr::select(farc, - start_time, -end_time)

###############* PREPARING THE DATA *###############
######### Creating index measures #########
##### Trust in institutions score #####
#subset of data that includes the variables to be standardized and the treatment indicator
a <- na.omit(dplyr::select(farc,trust_gov,trust_mayors,trust_council,trust_elec2019,trust_justice,trust_jep, treatment))

#control group
b <- a %>% filter(treatment == 0)

#equal weight***
wgts=rep(1, nrow(a))

#standardizing variables on the basis of the control group mean and standard deviation 
for(j in 1:ncol(a[,-7])){
  a[,j] <- (a[,j] - mean(b[,j]))/sd(b[,j])
}
i.vec <- as.matrix(rep(1,ncol(a[,-7])))

#the estimated (weighted) covariance matrix
Sx <- cov.wt(a[,-7], wt=wgts)[[1]]

#ICW weights
weights <- solve(t(i.vec)%*%solve(Sx)%*%i.vec)%*%t(i.vec)%*%solve(Sx)

#ICW index				
a$trust_score <- as.numeric(t(solve(t(i.vec)%*%solve(Sx)%*%i.vec)%*%t(i.vec)%*%solve(Sx)%*%t(a[,-7])))


a$ID <- rownames(a)
a$ID <- as.numeric(a$ID)
farc$ID <- rownames(farc)
farc$ID <- as.numeric(farc$ID)
a <- a %>% dplyr::select(trust_score, ID)

farc <- merge(farc,a, by = "ID", all.x = T)
rm(i.vec, Sx, weights, j, wgts,a,b)

##### Future political participation score #####
a <- na.omit(dplyr::select(farc,plan_vote2019, support_farc_candidate,plan_campaigning, treatment))

b <- a %>% filter(treatment == 0)
wgts=rep(1, nrow(a))

for(j in 1:ncol(a[,-4])){
  a[,j] <- (a[,j] - mean(b[,j]))/sd(b[,j])
}

i.vec <- as.matrix(rep(1,ncol(a[,-4])))
Sx <- cov.wt(a[,-4], wt=wgts)[[1]]
weights <- solve(t(i.vec)%*%solve(Sx)%*%i.vec)%*%t(i.vec)%*%solve(Sx)
a$participation_score <- as.numeric(t(solve(t(i.vec)%*%solve(Sx)%*%i.vec)%*%t(i.vec)%*%solve(Sx)%*%t(a[,-4])))

a$ID <- rownames(a)
a$ID <- as.numeric(a$ID)
farc$ID <- rownames(farc)
farc$ID <- as.numeric(farc$ID)
a <- a %>% dplyr::select(participation_score, ID)

farc <- merge(farc,a, by = "ID", all.x = T)
rm(i.vec, Sx, weights, j, wgts,a,b)
##### Trust in  democracy score#####
a <- na.omit(dplyr::select(farc,system_inclusive,democracy_best, farc_goals,voice_ingov,mecdem_efficient, treatment))

b <- a %>% filter(treatment == 0)
wgts=rep(1, nrow(a))

for(j in 1:ncol(a[,-6])){
  a[,j] <- (a[,j] - mean(b[,j]))/sd(b[,j])
}

i.vec <- as.matrix(rep(1,ncol(a[,-6])))
Sx <- cov.wt(a[,-6], wt=wgts)[[1]]
weights <- solve(t(i.vec)%*%solve(Sx)%*%i.vec)%*%t(i.vec)%*%solve(Sx)
a$democracy_score <- as.numeric(t(solve(t(i.vec)%*%solve(Sx)%*%i.vec)%*%t(i.vec)%*%solve(Sx)%*%t(a[,-6])))

a$ID <- rownames(a)
a$ID <- as.numeric(a$ID)
farc$ID <- rownames(farc)
farc$ID <- as.numeric(farc$ID)
a <- a %>% dplyr::select(democracy_score, ID)

farc <- merge(farc,a, by = "ID", all.x = T)
rm(i.vec, Sx, weights, j, wgts,a,b)

##### Moderation score#####
a <- na.omit(dplyr::select(farc,moderation_platform, moderation_alliance, treatment))
b <- a %>% filter(treatment == 0)
wgts=rep(1, nrow(a))

for(j in 1:ncol(a[,-3])){
  a[,j] <- (a[,j] - mean(b[,j]))/sd(b[,j])
}

i.vec <- as.matrix(rep(1,ncol(a[,-3])))
Sx <- cov.wt(a[,-3], wt=wgts)[[1]]
weights <- solve(t(i.vec)%*%solve(Sx)%*%i.vec)%*%t(i.vec)%*%solve(Sx)
a$moderation_score <- as.numeric(t(solve(t(i.vec)%*%solve(Sx)%*%i.vec)%*%t(i.vec)%*%solve(Sx)%*%t(a[,-3])))

a$ID <- rownames(a)
a$ID <- as.numeric(a$ID)
farc$ID <- rownames(farc)
farc$ID <- as.numeric(farc$ID)
a <- a %>% dplyr::select(moderation_score, ID)

farc <- merge(farc,a, by = "ID", all.x = T)
rm(i.vec, Sx, weights, j, wgts,a,b)

##### Conflict experience score#####
a <- na.omit(dplyr::select(farc, lifethreat, shot, combat, treatment))
b <- a %>% filter(treatment == 0)
wgts=rep(1, nrow(a))

for(j in 1:ncol(a[,-4])){
  a[,j] <- (a[,j] - mean(b[,j]))/sd(b[,j])
}

i.vec <- as.matrix(rep(1,ncol(a[,-4])))
Sx <- cov.wt(a[,-4], wt=wgts)[[1]]
weights <- solve(t(i.vec)%*%solve(Sx)%*%i.vec)%*%t(i.vec)%*%solve(Sx)
a$conflict_score <- as.numeric(t(solve(t(i.vec)%*%solve(Sx)%*%i.vec)%*%t(i.vec)%*%solve(Sx)%*%t(a[,-4])))

a$ID <- rownames(a)
a$ID <- as.numeric(a$ID)
farc$ID <- rownames(farc)
farc$ID <- as.numeric(farc$ID)
a <- a %>% dplyr::select(conflict_score, ID)

farc <- merge(farc,a, by = "ID", all.x = T)
rm(i.vec, Sx, weights, j, wgts,a,b)

######### Weights #########
#Creating weights for each observation, where each weight is the probability the individual is in the treated group or in the control group depending on which group the individual was assigned to 
farc$weight <- ifelse(farc$municipio == "Arauquita" & farc$treatment == 1, 1/22 ,NA)
farc$weight <- ifelse(farc$municipio == "Arauquita" & farc$treatment == 0, 1/10 ,farc$weight)
farc$weight <- ifelse(farc$municipio == "Bogotá" & farc$treatment == 1, 1/8 ,farc$weight)
farc$weight <- ifelse(farc$municipio == "Bogotá" & farc$treatment == 0, 1/8 ,farc$weight)
farc$weight <- ifelse(farc$municipio == "Coyaima" & farc$treatment == 1, 1/12 ,farc$weight)
farc$weight <- ifelse(farc$municipio == "Coyaima" & farc$treatment == 0, 1/17 ,farc$weight)
farc$weight <- ifelse(farc$municipio == "Florencia" & farc$treatment == 1, 1/7 ,farc$weight)
farc$weight <- ifelse(farc$municipio == "Florencia" & farc$treatment == 0, 1/6 ,farc$weight)
farc$weight <- ifelse(farc$municipio == "Iconozo" & farc$treatment == 1, 1/7 ,farc$weight)
farc$weight <- ifelse(farc$municipio == "Iconozo" & farc$treatment == 0, 1/7 ,farc$weight)
farc$weight <- ifelse(farc$municipio == "Montañita" & farc$treatment == 1, 1/14 ,farc$weight)
farc$weight <- ifelse(farc$municipio == "Montañita" & farc$treatment == 0, 1/18 ,farc$weight)
farc$weight <- ifelse(farc$municipio == "Planadas" & farc$treatment == 1, 1/13 ,farc$weight)
farc$weight <- ifelse(farc$municipio == "Planadas" & farc$treatment == 0, 1/13 ,farc$weight)
farc$weight <- ifelse(farc$municipio == "Villavicencio" & farc$treatment == 1, 1/14 ,farc$weight)
farc$weight <- ifelse(farc$municipio == "Villavicencio" & farc$treatment == 0, 1/8 ,farc$weight)
farc$weight <- ifelse(farc$municipio == "Vista Hermosa" & farc$treatment == 1, 1/61 ,farc$weight)
farc$weight <- ifelse(farc$municipio == "Vista Hermosa" & farc$treatment == 0, 1/30 ,farc$weight)


######### Imputation to address missingness in COVARIATES #########
farc$schooling <- ifelse(farc$schooling == 7, NA, farc$schooling)
farc$gender <- ifelse(farc$gender == "F",1,farc$gender)
farc$gender <- ifelse(farc$gender == "M",0,farc$gender)
farc$gender <- ifelse(farc$gender == "",NA,farc$gender)
farc$gender <- as.numeric(farc$gender)

set.seed(123456)
library(mice)
controls<- farc %>% dplyr::select(ID, municipio, gender, age, schooling, race)
controls$gender <- as.factor(controls$gender)
controls$age<- as.factor(controls$age)
controls$schooling <- as.factor(controls$schooling)
controls$race <- as.factor(controls$race)

init = mice(controls, maxit=0) 
meth = init$method
predM = init$predictorMatrix
predM[, c("ID")]=0
meth[c("gender")]="logreg"
meth[c("race")]="polyreg"
meth[c("schooling")]="polr"

imputed = mice(controls, method=meth, predictorMatrix=predM, m=5)
imputed <- complete(imputed)
imputed$age <- as.numeric(as.character(imputed$age))
imputed$schooling <- as.numeric(as.character(imputed$schooling))
imputed$gender <- as.numeric(as.character(imputed$gender))
imputed$race <- as.numeric(as.character(imputed$race))

farcs <- dplyr::select(farc, -municipio, -age, -gender, -race, -schooling)
farcs <- merge(farcs, imputed, by = "ID")

rm(imputed)

######### Imputation to address missingness in DEMOCRACY, TRUST AND PARTICIPATION outcomes #########
set.seed(123456)
library(mice)

farcregsindex<- farcs %>% dplyr::select(ID, municipio, gender, age, schooling, race, trust_gov, trust_mayors, trust_council, trust_elec2019, trust_justice, trust_jep, plan_vote2019, support_farc_candidate, plan_campaigning, system_inclusive, democracy_best, farc_goals, voice_ingov, mecdem_efficient)

farcregsindex$plan_vote2019<- as.factor(farc$plan_vote2019)
farcregsindex$support_farc_candidate <- as.factor(farc$support_farc_candidate)
farcregsindex$plan_campaigning<- as.factor(farc$plan_campaigning)

init = mice(farcregsindex, maxit=0) 
meth = init$method
predM = init$predictorMatrix
predM[, c("ID")]=0
meth[c("plan_vote2019")]="logreg"
meth[c("support_farc_candidate")]="logreg"
meth[c("plan_campaigning")]="logreg"

imputed = mice(farcregsindex, method=meth, predictorMatrix=predM, m=5)
imputed <- complete(imputed)

f <- farcs %>% dplyr::select(ID, treatment,  weight, assignment)

imputed <- merge(imputed, f, by = "ID")
rm(f)

imputed$plan_vote2019 <- as.numeric(as.character(imputed$plan_vote2019))
imputed$support_farc_candidate <- as.numeric(as.character(imputed$support_farc_candidate))
imputed$plan_campaigning <- as.numeric(as.character(imputed$plan_campaigning))

######### Creating index measures of DEMOCRACY, TRUST AND PARTICIPATION with imputed components #########
##### Trust in institutions score #####
#subset of data that includes the variables to be standardized and the treatment indicator
a <- na.omit(imputed[c(7:12,21)])

#control group
b <- a %>% filter(treatment== 0)

#equal weight***
wgts=rep(1, nrow(a))

#standardizing variables on the basis of the control group mean and standard deviation 
for(j in 1:ncol(a[,-7])){
  a[,j] <- (a[,j] - mean(b[,j]))/sd(b[,j])
}
i.vec <- as.matrix(rep(1,ncol(a[,-7])))

#the estimated (weighted) covariance matrix
Sx <- cov.wt(a[,-7], wt=wgts)[[1]]

#ICW weights
weights <- solve(t(i.vec)%*%solve(Sx)%*%i.vec)%*%t(i.vec)%*%solve(Sx)

#ICW index				
a$trust_score <- as.numeric(t(solve(t(i.vec)%*%solve(Sx)%*%i.vec)%*%t(i.vec)%*%solve(Sx)%*%t(a[,-7])))


a$ID <- rownames(a)
a$ID <- as.numeric(a$ID)
a <- a %>% dplyr::select(trust_score, ID)

imputed <- merge(imputed,a, by = "ID", all.x = T)
rm(i.vec, Sx, weights, j, wgts,a,b)

##### Future political participation #####
a <- na.omit(imputed[c(13:15,21)])
b <- a %>% filter(treatment == 0)
wgts=rep(1, nrow(a))

for(j in 1:ncol(a[,-4])){
  a[,j] <- (a[,j] - mean(b[,j]))/sd(b[,j])
}

i.vec <- as.matrix(rep(1,ncol(a[,-4])))
Sx <- cov.wt(a[,-4], wt=wgts)[[1]]
weights <- solve(t(i.vec)%*%solve(Sx)%*%i.vec)%*%t(i.vec)%*%solve(Sx)
a$participation_score <- as.numeric(t(solve(t(i.vec)%*%solve(Sx)%*%i.vec)%*%t(i.vec)%*%solve(Sx)%*%t(a[,-4])))

a$ID <- rownames(a)
a$ID <- as.numeric(a$ID)
a <- a %>% dplyr::select(participation_score, ID)

imputed <- merge(imputed,a, by = "ID", all.x = T)
rm(i.vec, Sx, weights, j, wgts,a,b)


##### Trust in  democracy #####
a <- na.omit(imputed[c(16:20,21)])
b <- a %>% filter(treatment == 0)
wgts=rep(1, nrow(a))

for(j in 1:ncol(a[,-6])){
  a[,j] <- (a[,j] - mean(b[,j]))/sd(b[,j])
}

i.vec <- as.matrix(rep(1,ncol(a[,-6])))
Sx <- cov.wt(a[,-6], wt=wgts)[[1]]
weights <- solve(t(i.vec)%*%solve(Sx)%*%i.vec)%*%t(i.vec)%*%solve(Sx)
a$democracy_score <- as.numeric(t(solve(t(i.vec)%*%solve(Sx)%*%i.vec)%*%t(i.vec)%*%solve(Sx)%*%t(a[,-6])))

a$ID <- rownames(a)
a$ID <- as.numeric(a$ID)
a <- a %>% dplyr::select(democracy_score, ID)

imputed <- merge(imputed,a, by = "ID", all.x = T)
rm(i.vec, Sx, weights, j, wgts,a,b)

######### Imputation to address missingness in MODERATION OUTCOME #########
farcregsindex<- farcs %>% dplyr::select(ID, municipio, gender, age, schooling, race, moderation_platform,moderation_alliance)

init = mice(farcregsindex, maxit=0) 
meth = init$method
predM = init$predictorMatrix
predM[, c("ID")]=0

imputed_mod = mice(farcregsindex, method=meth, predictorMatrix=predM, m=5)
imputed_mod <- complete(imputed_mod)

imputed_mod$moderation_alliance <- ifelse(imputed_mod$municipio == "Coyaima",NA, imputed_mod$moderation_alliance)
imputed_mod$moderation_alliance <- ifelse(imputed_mod$municipio == "Planadas",NA, imputed_mod$moderation_alliance)


f <- farcs %>% dplyr::select(ID, treatment, weight, assignment)

imputed_mod <- merge(imputed_mod, f, by = "ID")
rm(f)

######### Creating index measures of MODERATION with imputed components #########
a <- na.omit(imputed_mod[c(7,8,9)])
b <- a %>% filter(treatment == 0)
wgts=rep(1, nrow(a))

for(j in 1:ncol(a[,-3])){
  a[,j] <- (a[,j] - mean(b[,j]))/sd(b[,j])
}

i.vec <- as.matrix(rep(1,ncol(a[,-3])))
Sx <- cov.wt(a[,-3], wt=wgts)[[1]]
weights <- solve(t(i.vec)%*%solve(Sx)%*%i.vec)%*%t(i.vec)%*%solve(Sx)
a$moderation_score <- as.numeric(t(solve(t(i.vec)%*%solve(Sx)%*%i.vec)%*%t(i.vec)%*%solve(Sx)%*%t(a[,-3])))

a$ID <- rownames(a)
a$ID <- as.numeric(a$ID)

a <- a %>% dplyr::select(moderation_score, ID)

imputed_mod <- merge(imputed_mod,a, by = "ID", all.x = T)
rm(i.vec, Sx, weights, j, wgts,a,b)



######### Imputation to address missingness in CONFLICT outcome #########
farcregsindex<- farcs %>% dplyr::select(ID,  municipio, gender, age, schooling, race, lifethreat, shot, combat)

farcregsindex$lifethreat <- as.factor(farcregsindex$lifethreat)
farcregsindex$shot <- as.factor(farcregsindex$shot)
farcregsindex$combat <- as.factor(farcregsindex$combat)

init = mice(farcregsindex, maxit=0) 
meth = init$method
predM = init$predictorMatrix
predM[, c("ID")]=0
meth[c("lifethreat")]="polr"
meth[c("shot")]="polr"
meth[c("combat")]="polr"

imputed_con = mice(farcregsindex, method=meth, predictorMatrix=predM, m=5)
imputed_con <- complete(imputed_con)

f <- farcs %>% dplyr::select(ID, treatment, weight, assignment)

imputed_con <- merge(imputed_con, f, by = "ID")
rm(f)

######### Creating index measures of CONFLICT with imputed components #########
imputed_con$lifethreat <- as.numeric(as.character(imputed_con$lifethreat))
imputed_con$shot <- as.numeric(as.character(imputed_con$shot))
imputed_con$combat <- as.numeric(as.character(imputed_con$combat))

a <- na.omit(imputed_con[c(7,8,9,10)])
b <- a %>% filter(treatment == 0)
wgts=rep(1, nrow(a))

for(j in 1:ncol(a[,-4])){
  a[,j] <- (a[,j] - mean(b[,j]))/sd(b[,j])
}

i.vec <- as.matrix(rep(1,ncol(a[,-4])))
Sx <- cov.wt(a[,-4], wt=wgts)[[1]]
weights <- solve(t(i.vec)%*%solve(Sx)%*%i.vec)%*%t(i.vec)%*%solve(Sx)
a$conflict_score <- as.numeric(t(solve(t(i.vec)%*%solve(Sx)%*%i.vec)%*%t(i.vec)%*%solve(Sx)%*%t(a[,-4])))

a$ID <- rownames(a)
a$ID <- as.numeric(a$ID)

a <- a %>% dplyr::select(conflict_score, ID)

imputed_con <- merge(imputed_con,a, by = "ID", all.x = T)
rm(i.vec, Sx, weights, j, wgts,a,b)


######### Imputation to address missingness in POSTURING outcomes #########
farcregsposturing<- farcs %>% dplyr::select(ID, municipio, gender, age, schooling, race,  sat_implementationfarc, ideology_personal, farc_loyalrevolutionary)

farcregsposturing$sat_implementationfarc <- as.factor(farc$sat_implementationfarc)
farcregsposturing$ideology_personal <- as.factor(farc$ideology_personal)
farcregsposturing$farc_loyalrevolutionary <- as.factor(farc$farc_loyalrevolutionary)

init = mice(farcregsposturing, maxit=0) 
meth = init$method
predM = init$predictorMatrix
predM[, c("ID")]=0

imputed_post = mice(farcregsposturing, method=meth, predictorMatrix=predM, m=5)
imputed_post <- complete(imputed_post)

f <- farcs %>% dplyr::select(ID, treatment, weight, assignment)

imputed_post <- merge(imputed_post, f, by = "ID")
rm(f)

imputed_post$sat_implementationfarc <- as.numeric(as.character(imputed_post$sat_implementationfarc))
imputed_post$ideology_personal <- as.numeric(as.character(imputed_post$ideology_personal))
imputed_post$farc_loyalrevolutionary <- as.numeric(as.character(imputed_post$farc_loyalrevolutionary))

######### Imputation to address missingness in SOCIAL DESIRABILITY (ELECTION) outcome #########
farcregssd<- farcs %>% dplyr::select(ID, municipio, gender, age, schooling, race, voted2016)

farcregssd$voted2016 <- as.factor(farc$voted2016)

init = mice(farcregssd, maxit=0) 
meth = init$method
predM = init$predictorMatrix
predM[, c("ID")]=0
meth[c("voted2016")]="logreg"

imputed_sd = mice(farcregssd, method=meth, predictorMatrix=predM, m=5)
imputed_sd <- complete(imputed_sd)

f <- farcs %>% dplyr::select(ID, treatment, weight, assignment)

imputed_sd <- merge(imputed_sd, f, by = "ID")
rm(f)

imputed_sd$voted2016 <- as.numeric(as.character(imputed_sd$voted2016))

######### Merging in imputed outcomes #########
imputed <- merge(imputed, imputed_mod[,c(1,7:8,12)], by = "ID") #ID, moderation_platform, moderation_alliance, moderation_score
imputed <- merge(imputed, imputed_post[,c(1,7:9)], by = "ID") #ID, IMPLEMFARC, IDEOLOGIAPERSONAL, FARCFIELALAIDEASREVOL
imputed <- merge(imputed, imputed_sd[,c(1,7)], by = "ID") #ID, ELECCIONES2016
imputed <- merge(imputed, imputed_con[,c(1,7:9,13)], by = "ID") #"ID, VIDAAMENAZADA, LEDISPARARONBOMBARDEARON, ENFRENTAMIENTOS, conflict_score

imputed$schooling <- as.numeric(imputed$schooling)
imputed$schooling <- as.numeric(imputed$schooling)
rm(farcs)

#Coding race variables
#mestizo is the reference level, and not making dummy for "other" category which is RAZA = 4 because only 4 observations
imputed$blanco <- ifelse(imputed$race== 0,1,0)
imputed$negro <-  ifelse(imputed$race == 2,1,0)
imputed$indigena <-  ifelse(imputed$race == 3,1,0)

farc$blanco <- ifelse(farc$race == 0,1,0)
farc$negro <-  ifelse(farc$race == 2,1,0)
farc$indigena <-  ifelse(farc$race == 3,1,0)
farc$mestizo <-  ifelse(farc$race == 1,1,0)

imputed <- merge(imputed, dplyr::select(farc, ID, org_belongs), by = "ID")
imputed <- merge(imputed, dplyr::select(farc, ID, pdr), by = "ID")

#Coding schooling variables
#measure of no schooling and measure of ages 18-24 
imputed$noneorbasic <- ifelse(imputed$schooling == 0,1,imputed$schooling)
imputed$noneorbasic <- ifelse(imputed$noneorbasic == 1,1,0)

#Coding age variable
imputed$between18y40 <- ifelse(imputed$age > 40 ,0,1)


save(farc,file = "data_notimputed.RData")
save(imputed,file = "data_imputed.RData")

